1 Plots

  • Hadley Wickham: grammar of graphics

  • Hans Rosling: GapMinder

Gapminder World - Wealth & Health of Nations

1.1 Static: ggplot

1.1.1 Scatterplot

suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
  library(gapminder)
})

# preview data
gapminder
## Source: local data frame [1,704 x 6]
## 
##        country continent  year lifeExp      pop gdpPercap
##         (fctr)    (fctr) (int)   (dbl)    (int)     (dbl)
## 1  Afghanistan      Asia  1952  28.801  8425333  779.4453
## 2  Afghanistan      Asia  1957  30.332  9240934  820.8530
## 3  Afghanistan      Asia  1962  31.997 10267083  853.1007
## 4  Afghanistan      Asia  1967  34.020 11537966  836.1971
## 5  Afghanistan      Asia  1972  36.088 13079460  739.9811
## 6  Afghanistan      Asia  1977  38.438 14880372  786.1134
## 7  Afghanistan      Asia  1982  39.854 12881816  978.0114
## 8  Afghanistan      Asia  1987  40.822 13867957  852.3959
## 9  Afghanistan      Asia  1992  41.674 16317921  649.3414
## 10 Afghanistan      Asia  1997  41.763 22227415  635.3414
## ..         ...       ...   ...     ...      ...       ...
# get range of available data
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
# setup dataframe
g = gapminder %>%
  filter(year==2007) %>%   # most recent year 
  mutate(pop_m = pop/1e6)  # population, millions

# plot scatterplot of most recent year 
s = ggplot(g, aes(x=gdpPercap, y=lifeExp)) +
  geom_point()
s

# add aesthetic of size by population
s = s + 
  aes(size=pop_m)
s

# add aesthetic of color by continent
s = s + 
  aes(color=continent)
s

# add title, update axes labels
s = s + 
  ggtitle('Health & Wealth of Nations for 2007') +
  xlab('GDP per capita ($/year)') +
  ylab('Life expectancy (years)')
s

# label legend
s = s + 
  scale_colour_discrete(name='Continent') +
  scale_size_continuous(name='Population (M)')
s

Your Turn

Now with country emissions datasets…

1.1.2 Boxplot

# boxplot by continent
b = ggplot(g, aes(x=continent, y=lifeExp)) +
  geom_boxplot()
b

# match color to continents, like scatterplot
b = b +
  aes(fill=continent)
b

# drop legend, add title, update axes labels
b = b +
  theme(legend.position='none') +
  ggtitle('Life Expectancy by Continent for 2007') +
  xlab('Continent') +
  ylab('Life expectancy (years)')
b

Your Turn: Make a similar plot but for gdpPercap. Be sure to update the plot’s aesthetic, axis label and title accordingly.

1.2 Interactive: plotly

ggplot2 | plotly

suppressPackageStartupMessages({
  library(plotly) # install.packages('plotly')
})

# scatterplot (Note: key=country shows up on rollover)
s = ggplot(g, aes(x=gdpPercap, y=lifeExp, key=country)) +
  geom_point()
ggplotly(s)

# boxplot
ggplotly(b)

Your Turn: Expand the interactive scatterplot to include all the other bells and whistles of the previous plot in one continuous set of code (no in between setting of s).

1.3 Interactive: Exploding Boxplot

library(explodingboxplotR) # devtools::install_github('timelyportfolio/explodingboxplotR')

exploding_boxplot(g,
  y = 'lifeExp',
  group = 'continent',
  color = 'continent',
  label = 'country')

1.4 Interactive: Motion Plot

The googleVis package ports most of the Google charts functionality.

For every R chunk must set option results='asis', and once before any googleVis plots, set op <- options(gvis.plot.tag='chart').

suppressPackageStartupMessages({
  library(googleVis) # install.packages('googleVis')
})
op <- options(gvis.plot.tag='chart')

m = gvisMotionChart(
  gapminder %>%
    mutate(
      pop_m = pop / 1e6,
      log_gdpPercap = log(gdpPercap)), 
  idvar='country', 
  timevar='year', 
  xvar='log_gdpPercap', 
  yvar='lifeExp', 
  colorvar='continent',
  sizevar='pop_m')
plot(m)

Your Turn: Repeat the motion chart with the country having the highest gdpPercap filtered out.

2 Maps: tmap

Thematic maps

2.1 Static

library(tmap) # install.packages('tmap')

# load world spatial polygons
data(World)

# inspect values in World
World@data %>% tbl_df()
## Source: local data frame [177 x 15]
## 
##    iso_a3                   name           sovereignt
##    (fctr)                 (fctr)               (fctr)
## 1     AFG            Afghanistan          Afghanistan
## 2     AGO                 Angola               Angola
## 3     ALB                Albania              Albania
## 4     ARE   United Arab Emirates United Arab Emirates
## 5     ARG              Argentina            Argentina
## 6     ARM                Armenia              Armenia
## 7     ATA             Antarctica           Antarctica
## 8     ATF Fr. S. Antarctic Lands               France
## 9     AUS              Australia            Australia
## 10    AUT                Austria              Austria
## ..    ...                    ...                  ...
## Variables not shown: continent (fctr), subregion (fctr), area (dbl),
##   pop_est (dbl), pop_est_dens (dbl), gdp_md_est (dbl), gdp_cap_est (dbl),
##   economy (fctr), income_grp (fctr), life_exp (dbl), well_being (dbl), HPI
##   (dbl)
# gapminder countries not in World. skipping for now
g %>% 
  anti_join(World@data, by=c('country'='name')) %>% 
  arrange(desc(pop))
## Warning in anti_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
## Source: local data frame [19 x 7]
## 
##                     country continent  year lifeExp      pop  gdpPercap
##                      (fctr)    (fctr) (int)   (dbl)    (int)      (dbl)
## 1          Congo, Dem. Rep.    Africa  2007  46.462 64606759   277.5519
## 2               Korea, Rep.      Asia  2007  78.623 49044790 23348.1397
## 3          Korea, Dem. Rep.      Asia  2007  67.297 23301725  1593.0655
## 4               Yemen, Rep.      Asia  2007  62.698 22211743  2280.7699
## 5            Czech Republic    Europe  2007  76.486 10228744 22833.3085
## 6        Dominican Republic  Americas  2007  72.235  9319622  6025.3748
## 7          Hong Kong, China      Asia  2007  82.208  6980412 39724.9787
## 8           Slovak Republic    Europe  2007  74.663  5447502 18678.3144
## 9                 Singapore      Asia  2007  79.972  4553009 47143.1796
## 10   Bosnia and Herzegovina    Europe  2007  74.852  4552198  7446.2988
## 11 Central African Republic    Africa  2007  44.741  4369038   706.0165
## 12       West Bank and Gaza      Asia  2007  73.422  4018332  3025.3498
## 13              Congo, Rep.    Africa  2007  55.322  3800610  3632.5578
## 14                Mauritius    Africa  2007  72.801  1250882 10956.9911
## 15                  Reunion    Africa  2007  76.442   798094  7670.1226
## 16                  Comoros    Africa  2007  65.152   710960   986.1479
## 17                  Bahrain      Asia  2007  75.635   708573 29796.0483
## 18        Equatorial Guinea    Africa  2007  51.579   551201 12154.0897
## 19    Sao Tome and Principe    Africa  2007  65.528   199579  1598.4351
## Variables not shown: pop_m (dbl)
# World countries not in gapminder. skipping for now
World@data %>% 
  anti_join(g, by=c('name'='country')) %>% 
  arrange(desc(pop_est)) %>%
  select(iso_a3, name, pop_est)
## Warning in anti_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
##    iso_a3                   name   pop_est
## 1     RUS                 Russia 140041247
## 2     COD        Dem. Rep. Congo  68692542
## 3     KOR                  Korea  48508972
## 4     UKR                Ukraine  45700395
## 5     UZB             Uzbekistan  27606007
## 6     YEM                  Yemen  23822783
## 7     PRK        Dem. Rep. Korea  22665345
## 8     KAZ             Kazakhstan  15399437
## 9     SSD               S. Sudan  10625176
## 10    CZE             Czech Rep.  10211904
## 11    DOM         Dominican Rep.   9650054
## 12    BLR                Belarus   9648533
## 13    AZE             Azerbaijan   8238672
## 14    TJK             Tajikistan   7349145
## 15    LAO                   Laos   6834942
## 16    PNG       Papua New Guinea   6057263
## 17    SVK               Slovakia   5463046
## 18    KGZ             Kyrgyzstan   5431747
## 19    TKM           Turkmenistan   4884887
## 20    ARE   United Arab Emirates   4798491
## 21    GEO                Georgia   4615807
## 22    BIH       Bosnia and Herz.   4613414
## 23    CAF   Central African Rep.   4511488
## 24    MDA                Moldova   4320748
## 25    PSE              Palestine   4119083
## 26    COG                  Congo   4012809
## 27    LTU              Lithuania   3555179
## 28   <NA>             Somaliland   3500000
## 29    ARM                Armenia   2967004
## 30    LVA                 Latvia   2231503
## 31    MKD              Macedonia   2066718
## 32   <NA>                 Kosovo   1804838
## 33    EST                Estonia   1299371
## 34    TLS            Timor-Leste   1131612
## 35    FJI                   Fiji    944720
## 36    QAT                  Qatar    833285
## 37    GUY                 Guyana    772298
## 38    BTN                 Bhutan    691141
## 39    GNQ             Eq. Guinea    650702
## 40    SLB            Solomon Is.    595613
## 41    CYP                 Cyprus    531640
## 42    LUX             Luxembourg    491775
## 43    SUR               Suriname    481267
## 44    BRN                 Brunei    388190
## 45    BHS                Bahamas    309156
## 46    BLZ                 Belize    307899
## 47   <NA>              N. Cyprus    265100
## 48    NCL          New Caledonia    227436
## 49    VUT                Vanuatu    218519
## 50    GRL              Greenland     57600
## 51    ATA             Antarctica      3802
## 52    FLK           Falkland Is.      3140
## 53    ATF Fr. S. Antarctic Lands       140
## 54    ESH              W. Sahara        NA
# join gapminder data to World
World@data = World@data %>%
  left_join(g, by=c('name'='country'))
## Warning in left_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
# make map
m = tm_shape(World) +
    tm_polygons('lifeExp', palette='RdYlGn', id='name', title='Life expectancy (years)', auto.palette.mapping=F) +
    tm_style_gray() + tm_format_World()
m

2.2 Interactive

# show interactive map
tmap_leaflet(m)